

use "$home\persondata_v2", clear

gen log_income=log(lonind)

ren pnr pnrm
merge m:1 pnr using "$project\managerfe_1step_over5", keep(1 3) nogen
ren pnrm pnr

preserve
merge m:1 pnr year using "$home\Community\address", keep(1 3) keepus(postnummer) nogen
keep if year==1995
keep log_income postnummer
gen zipincome_top=log_income
gen zipincome_mean=log_income
gen zipincome_bottom=log_income
collapse (mean) zipincome_mean (p10) zipincome_bottom (p90) zipincome_top, by(postnummer)
save "$home\Community\zipincome_1995", replace

use "$home\Community\address", clear
drop if postnummer==.
bysort pnr: egen minyear=min(year)
keep if year==minyear
keep pnr postnummer
save "$home\Community\firstzip", replace

restore

merge m:1 pnr using "$home\Community\firstzip", keep(1 3) nogen
merge m:1 postnummer using "$home\Community\zipincome_1995", keep(1 3) nogen

merge 1:1 pnr year using "$project\manager\manager_all", keep(1 3)
gen byte manager=(_merge==3)
drop _merge log_income_firm

bysort pnr: gen count=_n

/* Rename variables */
rename akasst year_uifund
rename arblhu ui
ren koen sex
ren IE_TYPE immigrant
ren arledgr frac_unemployed
ren brutto gross_income
ren perindkp total_income
ren civst married
ren tlonkval quality_hour
ren aldernov age
ren koejd housing
gen ln_hrwage=log(timelon)
gen max_educ=int(hffsp/1000000)
gen byte educ=1 if max_educ<=25
replace educ=2 if max_educ==35
replace educ=3 if max_educ>=40&max_educ<=50
replace educ=4 if max_educ>=60&max_educ<.
drop max_educ
gen byte female=(sex==2)

*tenure
destring ansaar, replace
gen int tenure=year-ansaar
replace tenure=. if tenure>exper
bysort pnr lbnr: egen temp_firstyear=min(year)
by pnr: egen temp_firstyearever=min(year)
replace tenure=year-temp_firstyear if tenure==.&temp_firstyear!=temp_firstyearever
drop temp* ansaar

*age/tenure of becoming manager
*first job industry
*internally or externally promoted
gen temp=year if manager==1
bysort pnr: egen minyear_manager=min(temp)
drop temp
tsset pnr year
gen temp=age if year==minyear_manager
gen temp2=tenure if year==minyear_manager
gen temp3=branche if year==minyear_manager
gen temp4=(cvrnr==L.cvrnr) if year==minyear_manager
by pnr: egen byte first_age = sum(temp)
by pnr: egen byte first_tenure = sum(temp2)
by pnr: egen byte first_industry = sum(temp3)
by pnr: egen byte first_promote = sum(temp4)
drop temp*
bysort pnr: egen minyear=min(year)

*first firm inequality
bysort lbnr year: egen meanwage_firm=mean(log_income)
sort lbnr year log_income
by lbnr year: gen incomerank=_n
by lbnr year: gen nworker=_N
gen temp=log_income if incomerank<=0.1*nworker | (incomerank==1 & nworker<10)
gen temp2=log_income if incomerank>0.9*nworker | (incomerank==nworker & nworker<10)
gen temp1=log_income if incomerank<=0.5*nworker
gen temp11=log_income if incomerank>0.5*nworker
bysort lbnr year: egen temp3=mean(temp)
bysort lbnr year: egen temp4=mean(temp2)
bysort lbnr year: egen temp31=mean(temp1)
bysort lbnr year: egen temp41=mean(temp11)

gen tempyear=minyear_manager-1
replace tempyear=1995 if tempyear<1995
gen temp55=meanwage_firm if year==tempyear
gen temp66=temp3 if year==tempyear
gen temp77=temp4 if year==tempyear
gen temp88=temp31 if year==tempyear
gen temp99=temp41 if year==tempyear
bysort pnr: egen first_meanfirmwage_pre = sum(temp55)
by pnr: egen first_bottomfirmwage_pre = sum(temp66)
by pnr: egen first_topfirmwage_pre = sum(temp77)
bysort pnr: egen first_bottomfirmwage2_pre = sum(temp88)
by pnr: egen first_topfirmwage2_pre = sum(temp99)

gen temp5=meanwage_firm if year==minyear_manager
gen temp6=temp3 if year==minyear_manager
gen temp7=temp4 if year==minyear_manager
gen temp8=temp31 if year==minyear_manager
gen temp9=temp41 if year==minyear_manager
bysort pnr: egen first_meanfirmwage = sum(temp5)
by pnr: egen first_bottomfirmwage = sum(temp6)
by pnr: egen first_topfirmwage = sum(temp7)
bysort pnr: egen first_bottomfirmwage2 = sum(temp8)
by pnr: egen first_topfirmwage2 = sum(temp9)

gen temp51=meanwage_firm if year==minyear
gen temp61=temp3 if year==minyear
gen temp71=temp4 if year==minyear
gen temp81=temp31 if year==minyear
gen temp91=temp41 if year==minyear
bysort pnr: egen first_meanfirmwage_1995 = sum(temp51)
by pnr: egen first_bottomfirmwage_1995 = sum(temp61)
by pnr: egen first_topfirmwage_1995 = sum(temp71)
by pnr: egen first_bottomfirmwage_19952 = sum(temp81)
by pnr: egen first_topfirmwage_19952 = sum(temp91)
drop temp*

*first job income rank & occupation
sort pnr year
gen temp=incomerank/nworker if year==minyear
gen temp2=int(isco88/1000) if year==minyear
gen temp3=branche if year==minyear
gen temp4=arbkom if year==minyear
gen temp5=pstill if year==minyear
bysort pnr: egen incomerank_1995_firm=sum(temp)
bysort pnr: egen occupation_1995=sum(temp2)
bysort pnr: egen industry_1995=sum(temp3)
bysort pnr: egen city_1995=sum(temp4)
bysort pnr: egen pstill_1995=sum(temp5)
drop temp*

*number of jobs before manager
gen temp=cvrnr if year<minyear_manager
bysort pnr: egen temp2=nvals(temp)
by pnr: egen njob_premanager=mean(temp2)
replace njob_premanager=0 if njob_premanager==.
drop temp*

*public sector experience
gen temp=(persbrc>=840000 & persbrc<850000)
by pnr: egen temp2=sum(temp)
gen byte publicsector=(temp2>0)
drop temp*

*unemployment experience
tsset pnr year
gen temp=(L2.cvrnr!=. & cvrnr!=. & L.cvrnr==.)
replace temp=1 if L3.cvrnr!=. & cvrnr!=. & L.cvrnr==.
replace temp=1 if frac_unemployed>250
by pnr: egen temp2=sum(temp)
gen byte unemp=(temp2>0)
drop temp*

*UI
replace ui=0 if ui<0
bysort pnr: egen temp=sum(ui)
gen byte receive_ui=(temp>0)
drop temp

*founder

gen year_join=year-tenure-int(lbnr/1000000) if int(lbnr/1000000)!=1980
gen firmage=year-int(lbnr/1000000) if int(lbnr/1000000)!=1980
gen owner0=(year_join==0)
gen owner1=(year_join<=1)
replace owner0=. if year_join==.
replace owner1=. if year_join==.
bysort pnr: egen temp=sum(owner0)
bysort pnr: egen temp2=sum(owner1)
gen byte founder1=(temp>0)
gen byte founder2=(temp2>0)
drop temp* owner* year_join firmage

*income rank

gen birth_year=year-age
sort year birth_year log_income
by year birth_year: gen temp1=_N
by year birth_year: gen temp2=_n
gen temp3=temp2/temp1
bysort pnr: egen incomerank_1995_cohort=sum(temp3)
drop temp*

sort year birth_year female log_income
by year birth_year female: gen temp1=_N
by year birth_year female: gen temp2=_n
gen temp3=temp2/temp1
bysort pnr: egen incomerank_1995_cohortsex=sum(temp3)
drop temp*

sort year birth_year educ log_income
by year birth_year educ: gen temp1=_N
by year birth_year educ: gen temp2=_n
gen temp3=temp2/temp1
bysort pnr: egen incomerank_1995_cohorteduc=sum(temp3)
drop temp*

sort year birth_year branche log_income
by year birth_year branche: gen temp1=_N
by year birth_year branche: gen temp2=_n
gen temp3=temp2/temp1
bysort pnr: egen incomerank_1995_cohortind=sum(temp3)
drop temp*

sort year birth_year arbkom log_income
by year birth_year arbkom: gen temp1=_N
by year birth_year arbkom: gen temp2=_n
gen temp3=temp2/temp1
bysort pnr: egen incomerank_1995_cohortcity=sum(temp3)
drop temp*

*father occupation and income rank
merge m:1 pnr using "$home/pnrfm",  keep(1 3) nogen
merge m:1 pnrf using "$home/fathers", keep(1 3) nogen
merge m:1 pnrm using "$home/mothers", keep(1 3) nogen

ren pnr pnr0 
ren year year0
gen year=1995
ren pnrf pnr
cap drop lonind
merge m:1 pnr year using "$home\persondata_v2", keepus(lonind) nogen
ren lonind incomef
ren pnr pnrf
ren pnrm pnr 
merge m:1 pnr year using "$home\persondata_v2", keepus(lonind) nogen
ren lonind incomem
ren pnr pnrm
ren pnr0 pnr
drop year
ren year0 year

replace incomef=0 if incomef==.
replace incomem=0 if incomem==.
gen log_income_family=log(1+incomef+incomem)

gen max_educf=int(hffspf1994/1000000)
gen byte educf=1 if max_educf<=25
replace educf=2 if max_educf==35
replace educf=3 if max_educf>=40&max_educf<=50
replace educf=4 if max_educf>=60&max_educf<.
replace educf=0 if max_educf==.
gen max_educm=int(hffspm1994/1000000)
gen byte educm=1 if max_educm<=25
replace educm=2 if max_educm==35
replace educm=3 if max_educm>=40&max_educm<=50
replace educm=4 if max_educm>=60&max_educm<.
replace educm=0 if max_educm==.
drop max_educ*

*merge fathers and mothers education
merge m:1 pnrf using "$home/fathers2", keep(1 3) nogen
merge m:1 pnrm using "$home/mothers2", keep(1 3) nogen
foreach var in educm educf pstillm pstillf {
	replace `var'=0 if `var'==.
}

*union member union rate

merge m:1 pnr year using "$home/persondata_v3", keep(1 3) keepus(fag) nogen
gen byte union=(fag>0)
replace union=. if fag==.
bysort pnr: egen temp=sum(union)
gen byte maxunion=(temp>0)
drop temp

bysort cvrnr year: egen temp=mean(union)
gen temp2=temp if year==minyear_manager
bysort pnr: egen first_unionrate = sum(temp2)
drop temp*

merge m:1 pnr using "$education/business_detailed", keep(1 3) nogen
replace business=(business_ma==1 | business_ba==1)

gen expersq=exper^2

drop if pnr==.

drop pnrm
ren pnr pnrm
merge m:1 pnrm using "$project\managerfe_1step_over5", keep(1 3) nogen
ren pnrm pnr
gen pe=me_bayes

*merge m:1 pnr using "$project\managerfe_weighted", keep(1 3) nogen
cap drop count
bysort pnr: gen count=_n
bysort pnr: egen meanemp=mean(nworker)

keep if count==1 & pe!=.
drop count
egen pnrg=group(pnr)
compress
save "$project\manager\manager_characteristics_v3", replace

*Table 5

use "$project\manager\manager_characteristics_v3", clear

*OLS with top features (column 3)

reg pe female business log_income_family zipincome* first_meanfirmwage_1995  first_topfirmwage_19952 first_bottomfirmwage incomerank_1995_firm incomerank_1995_cohort

*all features 

reghdfe pe female business_* child06 immigrant log_income_family exper* zipincome* first_promote first_tenure first_meanfirmwage* first_topfirmwage* first_bottomfirmwage* incomerank_1995* publicsector unemp founder* receive_ui maxunion first_unionrate, absorb(birth_year educ educm educf married first_age first_industry njob_premanager occupation_1995 industry_1995 city_1995 pstill_1995)

keep if e(sample)==1

tab educ, gen(educd)
drop educd1
tab birth_year, gen(birthyear)
drop birthyear1
tab married, gen(marriedd)
drop marriedd1
tab njob_premanager, gen(njob_premanagerd)
drop njob_premanagerd1
tab first_industry, gen(first_industryd)
drop first_industryd1
tab first_age, gen(firstage)
drop firstage1
tab occupation_1995, gen(occupation_1995d)
drop occupation_1995d1
tab industry_1995, gen(industry_1995d)
drop industry_1995d1
tab pstill_1995, gen(pstill_1995d)
drop pstill_1995d1

merge 1:1 pnr using "$project\python\target_manager", keep(1 3)
gen target=(_merge==3)

drop pnr
ren pnrg pnr

gen pnr2 = pnr 
replace pnr2 = pnr+110000 if target==1

keep pnr pnr2 pe female business_* child06 immigrant log_income_family exper* zipincome* first_promote first_tenure first_meanfirmwage* first_topfirmwage* first_bottomfirmwage* incomerank_1995* publicsector unemp founder* receive_ui maxunion first_unionrate educd*  birthyear* marriedd* njob_premanagerd* first_industryd* firstage* occupation_1995d* industry_1995d* pstill_1995d* minyear_manager

sort pnr

export delimited using "I:\Workdata\702525\Alex\MA\python\manager_characteristics_v3.csv", nolabel replace

forv i=1/200 {

set seed `i'
gen double shuffle = runiform()
sort shuffle
gen long which = _n
ren pe pe_old
sort pe_old
gen pe = pe_old[which]
drop pe_old shuffle which

export delimited using "I:\Workdata\702525\Alex\MA\python\manager_characteristics_placebo_`i'.csv", nolabel replace

}

*run "predict.ipynb" to get predicted manager FE and feature coefficients


*Plot actual FE against predicted FE

replace var2=subinstr(var2,",",".",.)
replace var4=subinstr(var4,",",".",.)
destring var*, replace
corr var2 var4
sort var2
gen percentile=_n/_N
xtile quantile=var4, n(10)
collapse var2 percentile, by(quantile)
tw connected percentile quantile
